{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34d2dc2f-dd54-4fd4-a223-6095d5c71109",
   "metadata": {},
   "outputs": [],
   "source": [
    "#This script reproduces the complete comparative analysis from the paper:\n",
    "#Comparing Weibull and Markov Models for Reliability and Risk-Based Asset Management of Wastewater Lift Stations\"\n",
    "\n",
    "ANALYSIS OVERVIEW:\n",
    "1. Data loading and preprocessing\n",
    "2. Weibull reliability model calibration (CV=0.20 → β=5.8)\n",
    "3. Markov deterioration model implementation (4-state: Good/Fair/Poor/Failed)\n",
    "4. Risk calculation and budget scenario simulation\n",
    "5. System efficiency modeling with pump redundancy\n",
    "6. Sensitivity analysis (MTTR, deterioration speed)\n",
    "7. Results export and visualization\n",
    "\n",
    "#METHODOLOGICAL NOTES FOR REVIEWERS:\n",
    "- Both models use identical input data and risk formulation (PoF × CoF)\n",
    "- Budget optimization uses identical greedy ΔRisk/$ algorithm\n",
    "- Markov dwell times: 40%/30%/30% of EUL for Good/Fair/Poor states\n",
    "- Weibull calibrated to CV=0.20 for consistent variance across asset types\n",
    "- Efficiency model assumes binomial distribution for redundant pump operation\n",
    "                          \n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from math import comb\n",
    "from scipy.special import gamma\n",
    "from scipy.optimize import fsolve\n",
    "import os\n",
    "\n",
    "# ==================== CONFIGURATION ====================\n",
    "class AnalysisConfig:\n",
    "    \"\"\"Centralized configuration for all analysis parameters\"\"\"\n",
    "    # Simulation parameters\n",
    "    YEARS = 10\n",
    "    BUDGET_SCENARIOS = [2_500_000, 5_000_000]  # Annual budgets in USD\n",
    "    HOURS_PER_YEAR = 8760.0\n",
    "    \n",
    "    # Weibull parameters\n",
    "    TARGET_CV = 0.20  # Target coefficient of variation\n",
    "    \n",
    "    # Markov parameters\n",
    "    DWELL_SPANS = {'G': 0.40, 'F': 0.30, 'P': 0.30}  # Fractions of EUL\n",
    "    STATE_NAMES = ['Good', 'Fair', 'Poor', 'Failed']\n",
    "    \n",
    "    # Asset class mappings\n",
    "    CLASS_MAP = {\n",
    "        'LS_PU': 'Pump', 'LS_CN': 'Controls', 'LS_SC': 'SCADA', \n",
    "        'LS_VV': 'Valve', 'LS_OD': 'Odor', 'LS_WW': 'WetWell',\n",
    "        'LS_PG': 'Power/Gen', 'LS_GN': 'General', 'LS_AE': 'Aeration'\n",
    "    }\n",
    "    \n",
    "    # Default parameters (representative values)\n",
    "    MTTR_DEFAULTS = {\n",
    "        'Pump': 12.0, 'Controls': 6.0, 'SCADA': 4.0, 'Odor': 12.0,\n",
    "        'Valve': 10.0, 'WetWell': 16.0, 'Power/Gen': 10.0, 'General': 10.0, \n",
    "        'Aeration': 12.0\n",
    "    }\n",
    "    \n",
    "    COST_DEFAULTS = {\n",
    "        'Pump': 80_000, 'Controls': 40_000, 'SCADA': 25_000, 'Odor': 60_000,\n",
    "        'Valve': 15_000, 'WetWell': 120_000, 'Power/Gen': 100_000, \n",
    "        'General': 50_000, 'Aeration': 50_000\n",
    "    }\n",
    "    \n",
    "    # Pump system assumptions\n",
    "    DEFAULT_PUMPS_TOTAL = 2\n",
    "    DEFAULT_PUMPS_DUTY = 1\n",
    "\n",
    "config = AnalysisConfig()\n",
    "\n",
    "# DATA PREPROCESSING \n",
    "def load_and_preprocess_data(file_path):\n",
    "    \"\"\"\n",
    "    Load and preprocess lift station data with comprehensive error handling\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Handles inconsistent column names across different data exports\n",
    "    - Converts all numeric fields with appropriate error handling\n",
    "    - Maps asset categories to standardized classes\n",
    "    - Fills missing values with reasonable defaults\n",
    "    \"\"\"\n",
    "    print(\"Loading and preprocessing data...\")\n",
    "    \n",
    "    # Load data\n",
    "    df = pd.read_csv(file_path)\n",
    "    \n",
    "    # Display initial data info\n",
    "    print(f\"Original data shape: {df.shape}\")\n",
    "    print(f\"Columns: {list(df.columns)}\")\n",
    "    \n",
    "    # Standardize column names (flexible handling for different data formats)\n",
    "    column_mapping = {}\n",
    "    for col in df.columns:\n",
    "        col_lower = col.lower()\n",
    "        if any(x in col_lower for x in ['assetid', 'id', 'asset']):\n",
    "            column_mapping[col] = 'AssetID'\n",
    "        elif any(x in col_lower for x in ['effectiveage', 'age', 'ea']):\n",
    "            column_mapping[col] = 'Age'\n",
    "        elif any(x in col_lower for x in ['eul', 'usefullife', 'life']):\n",
    "            column_mapping[col] = 'EUL'\n",
    "        elif any(x in col_lower for x in ['criticality', 'cof', 'consequence']):\n",
    "            column_mapping[col] = 'Criticality'\n",
    "        elif any(x in col_lower for x in ['lof', 'likelihood']):\n",
    "            column_mapping[col] = 'LOF'\n",
    "        elif any(x in col_lower for x in ['rr_facility', 'facility', 'station']):\n",
    "            column_mapping[col] = 'RR_Facility'\n",
    "        elif any(x in col_lower for x in ['category', 'type']):\n",
    "            column_mapping[col] = 'Category'\n",
    "    \n",
    "    df = df.rename(columns=column_mapping)\n",
    "    \n",
    "    # Ensure required columns exist\n",
    "    required_cols = ['AssetID', 'Category', 'RR_Facility']\n",
    "    missing_cols = [col for col in required_cols if col not in df.columns]\n",
    "    if missing_cols:\n",
    "        raise ValueError(f\"Missing required columns: {missing_cols}\")\n",
    "    \n",
    "    # Convert numeric columns with error handling\n",
    "    numeric_cols = ['Age', 'EUL', 'Criticality', 'LOF']\n",
    "    for col in numeric_cols:\n",
    "        if col in df.columns:\n",
    "            df[col] = pd.to_numeric(df[col], errors='coerce')\n",
    "    \n",
    "    # Remove assets with missing EUL (critical for analysis)\n",
    "    initial_count = len(df)\n",
    "    df = df.dropna(subset=['EUL'])\n",
    "    print(f\"Removed {initial_count - len(df)} assets with missing EUL\")\n",
    "    \n",
    "    # Fill missing values\n",
    "    df['Age'] = df['Age'].fillna(0)\n",
    "    df['Criticality'] = df['Criticality'].fillna(3.0)\n",
    "    df['LOF'] = df['LOF'].fillna(3)  # Default to medium likelihood\n",
    "    \n",
    "    # Add derived columns\n",
    "    df['Class'] = df['Category'].map(config.CLASS_MAP).fillna('General')\n",
    "    df['MTTR_h'] = df['Class'].map(config.MTTR_DEFAULTS)\n",
    "    df['UnitCost'] = df['Class'].map(config.COST_DEFAULTS)\n",
    "    \n",
    "    print(f\"Final processed data shape: {df.shape}\")\n",
    "    print(f\"Lift stations: {df['RR_Facility'].nunique()}\")\n",
    "    print(f\"Asset categories: {df['Category'].value_counts().to_dict()}\")\n",
    "    \n",
    "    return df\n",
    "\n",
    "# WEIBULL MODEL \n",
    "\n",
    "def calibrate_weibull_parameters(df, target_cv=0.20):\n",
    "    \"\"\"\n",
    "    Calibrate Weibull parameters for reliability analysis\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Weibull survival function: R(t) = exp(-(t/η)^β)\n",
    "    - CV (coefficient of variation) = σ/μ for failure time distribution\n",
    "    - Relationship: CV = sqrt(Γ(1+2/β) - Γ(1+1/β)^2) / Γ(1+1/β)\n",
    "    - We solve for β that gives target CV, then calculate η = EUL / Γ(1+1/β)\n",
    "    - This ensures all assets have consistent failure time variance (CV=0.20)\n",
    "    \"\"\"\n",
    "    print(\"\\nCalibrating Weibull parameters...\")\n",
    "    \n",
    "    def cv_from_beta(beta):\n",
    "        \"\"\"Calculate coefficient of variation from Weibull shape parameter\"\"\"\n",
    "        return np.sqrt(gamma(1 + 2/beta) - gamma(1 + 1/beta)**2) / gamma(1 + 1/beta)\n",
    "    \n",
    "    # Solve for beta that gives target CV\n",
    "    beta_global = float(fsolve(lambda b: cv_from_beta(b) - target_cv, x0=5.0)[0])\n",
    "    \n",
    "    # Calculate scale parameter for each asset\n",
    "    df['beta'] = beta_global\n",
    "    df['eta'] = df['EUL'] / gamma(1 + 1/df['beta'])\n",
    "    \n",
    "    print(f\"Global Weibull parameters: β = {beta_global:.3f}, CV = {target_cv}\")\n",
    "    print(f\"η range: {df['eta'].min():.1f} - {df['eta'].max():.1f} years\")\n",
    "    \n",
    "    return df, beta_global\n",
    "\n",
    "def calculate_weibull_reliability(df, years=0):\n",
    "    \"\"\"\n",
    "    Calculate Weibull reliability and probability of failure\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Effective age increases each year for projection\n",
    "    - PoF = 1 - exp(-(Age/η)^β) for current reliability\n",
    "    - For projections: PoF(t) = 1 - exp(-((Age + t)/η)^β)\n",
    "    - Risk = PoF × Consequence (Criticality score)\n",
    "    \"\"\"\n",
    "    effective_age = df['Age'] + years\n",
    "    reliability = np.exp(-((effective_age / df['eta']) ** df['beta']))\n",
    "    pof = 1 - reliability\n",
    "    risk = pof * df['Criticality']\n",
    "    \n",
    "    return pof, risk, reliability\n",
    "\n",
    "# MARKOV MODEL\n",
    "\n",
    "def build_markov_transition_matrix(eul, speed_factor=1.0):\n",
    "    \"\"\"\n",
    "    Build Markov transition probability matrix\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - 4 states: Good (0), Fair (1), Poor (2), Failed (3)\n",
    "    - Failed state is absorbing (probability 1.0 to remain)\n",
    "    - Dwell times proportional to EUL: 40% Good, 30% Fair, 30% Poor\n",
    "    - Transition probability P_ij = 1 - exp(-1/dwell_time)\n",
    "    - speed_factor scales all transition rates (sensitivity parameter)\n",
    "    \"\"\"\n",
    "    # Calculate dwell times in years\n",
    "    dwell_times = {state: max(1e-6, span * eul / speed_factor) \n",
    "                   for state, span in config.DWELL_SPANS.items()}\n",
    "    \n",
    "    # Calculate annual transition probabilities\n",
    "    p_GF = 1 - np.exp(-1.0 / dwell_times['G'])  # Good → Fair\n",
    "    p_FP = 1 - np.exp(-1.0 / dwell_times['F'])  # Fair → Poor  \n",
    "    p_PFail = 1 - np.exp(-1.0 / dwell_times['P'])  # Poor → Failed\n",
    "    \n",
    "    # Build transition matrix\n",
    "    T = np.array([\n",
    "        [1 - p_GF, p_GF, 0.0, 0.0],           # Good row\n",
    "        [0.0, 1 - p_FP, p_FP, 0.0],           # Fair row\n",
    "        [0.0, 0.0, 1 - p_PFail, p_PFail],     # Poor row\n",
    "        [0.0, 0.0, 0.0, 1.0]                  # Failed row (absorbing)\n",
    "    ])\n",
    "    \n",
    "    return T\n",
    "\n",
    "def initialize_markov_state(pct_life_used):\n",
    "    \"\"\"\n",
    "    Initialize Markov state based on percentage of life used\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Maps LOF (1-5) or percentage life used to initial state probabilities\n",
    "    - LOF 1-2: Good, LOF 3: Fair, LOF 4: Poor, LOF 5: Failed\n",
    "    - Uses deterministic assignment for clean initial conditions\n",
    "    \"\"\"\n",
    "    if pct_life_used < 0.4:    # 0-40% life used\n",
    "        return np.array([1.0, 0.0, 0.0, 0.0])  # Good\n",
    "    elif pct_life_used < 0.7:  # 40-70% life used  \n",
    "        return np.array([0.0, 1.0, 0.0, 0.0])  # Fair\n",
    "    elif pct_life_used < 1.0:  # 70-100% life used\n",
    "        return np.array([0.0, 0.0, 1.0, 0.0])  # Poor\n",
    "    else:                       # ≥100% life used\n",
    "        return np.array([0.0, 0.0, 0.0, 1.0])  # Failed\n",
    "\n",
    "def simulate_markov_trajectory(df, years=10, speed_factor=1.0, renewal_years=None):\n",
    "    \"\"\"\n",
    "    Simulate Markov deterioration for all assets\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - State evolution: p_{t+1} = p_t × T (matrix multiplication)\n",
    "    - Renewal resets state to Good (perfect renewal assumption)\n",
    "    - Returns probability of failure over time for each asset\n",
    "    - Also returns full state evolution for detailed analysis\n",
    "    \"\"\"\n",
    "    renewal_years = set(renewal_years or [])\n",
    "    \n",
    "    fail_probs = []\n",
    "    state_evolution = []\n",
    "    \n",
    "    for _, asset in df.iterrows():\n",
    "        T = build_markov_transition_matrix(asset['EUL'], speed_factor)\n",
    "        pct_life_used = asset['Age'] / asset['EUL']\n",
    "        current_state = initialize_markov_state(pct_life_used)\n",
    "        \n",
    "        state_history = [current_state.copy()]\n",
    "        \n",
    "        for year in range(1, years + 1):\n",
    "            # State transition\n",
    "            current_state = current_state @ T\n",
    "            \n",
    "            # Renewal at year boundaries\n",
    "            if year in renewal_years:\n",
    "                current_state = np.array([1.0, 0.0, 0.0, 0.0])  # Reset to Good\n",
    "            \n",
    "            state_history.append(current_state.copy())\n",
    "        \n",
    "        state_array = np.array(state_history)\n",
    "        fail_probs.append(state_array[:, 3])  # Probability in Failed state\n",
    "        state_evolution.append(state_array)\n",
    "    \n",
    "    return np.array(fail_probs), np.array(state_evolution)\n",
    "\n",
    "# RISK & BUDGET OPTIMIZATION\n",
    "\n",
    "def greedy_risk_reduction_optimization(df, pof_current, risk_current, budget):\n",
    "    \"\"\"\n",
    "    Greedy optimization for risk reduction per dollar spent\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Benefit = current risk (risk reduction if renewed now)\n",
    "    - Cost = unit replacement cost\n",
    "    - BCP (Benefit-Cost Priority) = Risk / Cost\n",
    "    - Selects assets in descending BCP order until budget exhausted\n",
    "    - This is the same algorithm used in the City's IAMS system\n",
    "    \"\"\"\n",
    "    benefit = risk_current  # Risk reduction from renewal\n",
    "    cost = df['UnitCost'].values\n",
    "    \n",
    "    # Calculate benefit-cost ratio\n",
    "    with np.errstate(divide='ignore', invalid='ignore'):\n",
    "        bcp = np.where(cost > 0, benefit / cost, 0)\n",
    "    \n",
    "    # Sort by BCP (descending)\n",
    "    sort_order = np.argsort(-bcp)\n",
    "    \n",
    "    selected_indices = []\n",
    "    spent = 0.0\n",
    "    \n",
    "    for idx in sort_order:\n",
    "        if spent + cost[idx] <= budget:\n",
    "            selected_indices.append(idx)\n",
    "            spent += cost[idx]\n",
    "    \n",
    "    return set(selected_indices), spent\n",
    "\n",
    "def run_budget_scenarios(df, model_type, budgets, years=10):\n",
    "    \"\"\"\n",
    "    Run budget scenarios for a given reliability model\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Simulates annual budget allocation and renewal decisions\n",
    "    - Tracks system risk under do-nothing and funded scenarios\n",
    "    - Renewed assets have PoF reset (Weibull: age=0, Markov: state=Good)\n",
    "    - Results show how different funding levels affect risk trajectory\n",
    "    \"\"\"\n",
    "    scenario_results = {}\n",
    "    \n",
    "    for budget in budgets:\n",
    "        print(f\"Running {model_type} model with ${budget/1e6:.1f}M/year budget...\")\n",
    "        \n",
    "        yearly_metrics = []\n",
    "        current_assets = df.copy()\n",
    "        \n",
    "        for year in range(years + 1):\n",
    "            # Calculate current risk\n",
    "            if model_type == 'Weibull':\n",
    "                pof_current, risk_current, _ = calculate_weibull_reliability(current_assets, year)\n",
    "            else:  # Markov\n",
    "                pof_current, _ = simulate_markov_trajectory(current_assets, year, speed_factor=1.0)\n",
    "                risk_current = pof_current[:, -1] * current_assets['Criticality'].values\n",
    "            \n",
    "            risk_total = risk_current.sum()\n",
    "            \n",
    "            # Apply renewals (except year 0)\n",
    "            if year == 0:\n",
    "                renewed_indices = set()\n",
    "                spent = 0.0\n",
    "                risk_post = risk_total\n",
    "            else:\n",
    "                renewed_indices, spent = greedy_risk_reduction_optimization(\n",
    "                    current_assets, pof_current, risk_current, budget\n",
    "                )\n",
    "                \n",
    "                # Calculate post-renewal risk\n",
    "                risk_post = risk_total\n",
    "                for idx in renewed_indices:\n",
    "                    risk_post -= risk_current[idx]  # Remove risk from renewed assets\n",
    "            \n",
    "            yearly_metrics.append({\n",
    "                'Year': year,\n",
    "                'Budget': budget,\n",
    "                'Risk_Do_Nothing': risk_total,\n",
    "                'Risk_Post_Renewal': risk_post,\n",
    "                'Spent': spent,\n",
    "                'Assets_Renewed': len(renewed_indices)\n",
    "            })\n",
    "        \n",
    "        scenario_results[budget] = pd.DataFrame(yearly_metrics)\n",
    "    \n",
    "    return scenario_results\n",
    "\n",
    "# = EFFICIENCY MODEL\n",
    "\n",
    "def calculate_station_efficiency(df, availability_col):\n",
    "    \"\"\"\n",
    "    Calculate lift station efficiency considering pump redundancy\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Models n pumps with d duty pumps required for full capacity\n",
    "    - Pump states follow binomial distribution: Binomial(n, A)\n",
    "    - Efficiency = Σ P(k operational) × min(1, k/d)\n",
    "    - P_full = probability that at least d pumps are operational\n",
    "    - This captures the reliability benefit of redundancy\n",
    "    \"\"\"\n",
    "    results = []\n",
    "    \n",
    "    # Group by lift station\n",
    "    for station, assets in df.groupby('RR_Facility'):\n",
    "        # Get pump assets only\n",
    "        pumps = assets[assets['Class'] == 'Pump']\n",
    "        if pumps.empty:\n",
    "            continue\n",
    "            \n",
    "        # Calculate average pump availability for this station\n",
    "        avg_availability = pumps[availability_col].mean()\n",
    "        n_pumps = max(config.DEFAULT_PUMPS_TOTAL, len(pumps))\n",
    "        duty_pumps = config.DEFAULT_PUMPS_DUTY\n",
    "        \n",
    "        # Calculate efficiency using binomial distribution\n",
    "        efficiency = 0.0\n",
    "        p_full = 0.0\n",
    "        \n",
    "        for k in range(n_pumps + 1):\n",
    "            # Probability of exactly k pumps operational\n",
    "            prob_k = comb(n_pumps, k) * (avg_availability ** k) * ((1 - avg_availability) ** (n_pumps - k))\n",
    "            \n",
    "            # Efficiency contribution\n",
    "            eff_contribution = min(1.0, k / duty_pumps)\n",
    "            efficiency += eff_contribution * prob_k\n",
    "            \n",
    "            # Probability of full service\n",
    "            if k >= duty_pumps:\n",
    "                p_full += prob_k\n",
    "        \n",
    "        results.append({\n",
    "            'RR_Facility': station,\n",
    "            'Pump_Count': len(pumps),\n",
    "            'Avg_Availability': avg_availability,\n",
    "            'Efficiency': efficiency,\n",
    "            'P_Full_Service': p_full\n",
    "        })\n",
    "    \n",
    "    return pd.DataFrame(results)\n",
    "\n",
    "# SENSITIVITY ANALYSIS\n",
    "\n",
    "def run_sensitivity_analysis(df, mttr_values, speed_factors):\n",
    "    \"\"\"\n",
    "    Run comprehensive sensitivity analysis\n",
    "    \n",
    "    FOR REVIEWERS:\n",
    "    - Tests model sensitivity to key parameters:\n",
    "      * MTTR (Mean Time To Repair): 8, 12, 24 hours\n",
    "      * Markov speed factor: 0.8, 1.0, 1.2 (slower/normal/faster deterioration)\n",
    "    - Evaluates impact on pump availability and system efficiency\n",
    "    - Shows robustness of conclusions to parameter uncertainty\n",
    "    \"\"\"\n",
    "    print(\"\\nRunning sensitivity analysis...\")\n",
    "    \n",
    "    sensitivity_results = []\n",
    "    \n",
    "    for mttr in mttr_values:\n",
    "        for speed in speed_factors:\n",
    "            # Calculate availability with modified MTTR\n",
    "            def calculate_availability(pof, asset_class):\n",
    "                base_mttr = config.MTTR_DEFAULTS.get(asset_class, 10.0)\n",
    "                actual_mttr = mttr if asset_class == 'Pump' else base_mttr\n",
    "                failure_rate = -np.log(np.clip(1 - pof, 1e-9, 1.0)) / config.HOURS_PER_YEAR\n",
    "                unavailability = failure_rate * actual_mttr\n",
    "                return np.clip(1.0 - unavailability, 0.0, 1.0)\n",
    "            \n",
    "            # Weibull baseline availability\n",
    "            pof_weibull, _, _ = calculate_weibull_reliability(df, 0)\n",
    "            avail_weibull = [calculate_availability(pof, cls) \n",
    "                           for pof, cls in zip(pof_weibull, df['Class'])]\n",
    "            \n",
    "            # Markov availability at year 1\n",
    "            pof_markov, _ = simulate_markov_trajectory(df, 1, speed)\n",
    "            avail_markov = [calculate_availability(pof, cls) \n",
    "                          for pof, cls in zip(pof_markov[:, -1], df['Class'])]\n",
    "            \n",
    "            # Calculate pump-specific metrics\n",
    "            pump_mask = df['Class'] == 'Pump'\n",
    "            avg_avail_weibull = np.mean(np.array(avail_weibull)[pump_mask])\n",
    "            avg_avail_markov = np.mean(np.array(avail_markov)[pump_mask])\n",
    "            \n",
    "            # Calculate system efficiency\n",
    "            df_weibull = df.copy()\n",
    "            df_weibull['Availability'] = avail_weibull\n",
    "            df_markov = df.copy()  \n",
    "            df_markov['Availability'] = avail_markov\n",
    "            \n",
    "            eff_weibull = calculate_station_efficiency(df_weibull, 'Availability')\n",
    "            eff_markov = calculate_station_efficiency(df_markov, 'Availability')\n",
    "            \n",
    "            avg_eff_weibull = eff_weibull['Efficiency'].mean()\n",
    "            avg_eff_markov = eff_markov['Efficiency'].mean()\n",
    "            \n",
    "            sensitivity_results.append({\n",
    "                'MTTR_hours': mttr,\n",
    "                'Speed_Factor': speed,\n",
    "                'Avg_Pump_Availability_Weibull': avg_avail_weibull,\n",
    "                'Avg_Pump_Availability_Markov': avg_avail_markov,\n",
    "                'Avg_Efficiency_Weibull': avg_eff_weibull,\n",
    "                'Avg_Efficiency_Markov': avg_eff_markov\n",
    "            })\n",
    "    \n",
    "    return pd.DataFrame(sensitivity_results)\n",
    "\n",
    "# VISUALIZATION\n",
    "\n",
    "def create_summary_plots(df, weibull_results, markov_results, output_dir):\n",
    "    \"\"\"Create comprehensive visualization of results\"\"\"\n",
    "    print(\"\\nGenerating summary plots...\")\n",
    "    \n",
    "    # Set style\n",
    "    plt.style.use('default')\n",
    "    sns.set_palette(\"husl\")\n",
    "    \n",
    "    # 1. System Risk Trajectories\n",
    "    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n",
    "    \n",
    "    # Weibull trajectories\n",
    "    for budget, results in weibull_results.items():\n",
    "        ax1.plot(results['Year'], results['Risk_Post_Renewal'], \n",
    "                label=f'${budget/1e6:.1f}M/yr', linewidth=2)\n",
    "    ax1.set_title('Weibull Model: System Risk Trajectories')\n",
    "    ax1.set_xlabel('Year')\n",
    "    ax1.set_ylabel('Total System Risk')\n",
    "    ax1.legend()\n",
    "    ax1.grid(True, alpha=0.3)\n",
    "    \n",
    "    # Markov trajectories  \n",
    "    for budget, results in markov_results.items():\n",
    "        ax2.plot(results['Year'], results['Risk_Post_Renewal'],\n",
    "                label=f'${budget/1e6:.1f}M/yr', linewidth=2)\n",
    "    ax2.set_title('Markov Model: System Risk Trajectories')\n",
    "    ax2.set_xlabel('Year')\n",
    "    ax2.set_ylabel('Total System Risk')\n",
    "    ax2.legend()\n",
    "    ax2.grid(True, alpha=0.3)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.savefig(f'{output_dir}/system_risk_trajectories.png', dpi=300, bbox_inches='tight')\n",
    "    plt.close()\n",
    "    \n",
    "    # 2. Category-wise PoF Comparison\n",
    "    categories = df['Category'].unique()\n",
    "    years = range(config.YEARS + 1)\n",
    "    \n",
    "    fig, axes = plt.subplots(3, 3, figsize=(15, 12))\n",
    "    axes = axes.flatten()\n",
    "    \n",
    "    for i, category in enumerate(categories[:9]):  # Plot first 9 categories\n",
    "        cat_assets = df[df['Category'] == category]\n",
    "        \n",
    "        # Calculate PoF trajectories\n",
    "        weibull_pof = []\n",
    "        markov_pof = []\n",
    "        \n",
    "        for year in years:\n",
    "            pof_w, _, _ = calculate_weibull_reliability(cat_assets, year)\n",
    "            pof_m, _ = simulate_markov_trajectory(cat_assets, year)\n",
    "            \n",
    "            weibull_pof.append(pof_w.mean())\n",
    "            markov_pof.append(pof_m[:, -1].mean())\n",
    "        \n",
    "        axes[i].plot(years, weibull_pof, label='Weibull', linewidth=2)\n",
    "        axes[i].plot(years, markov_pof, label='Markov', linewidth=2)\n",
    "        axes[i].set_title(f'{category} (n={len(cat_assets)})')\n",
    "        axes[i].set_xlabel('Year')\n",
    "        axes[i].set_ylabel('Mean PoF')\n",
    "        axes[i].legend()\n",
    "        axes[i].grid(True, alpha=0.3)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.savefig(f'{output_dir}/category_pof_trajectories.png', dpi=300, bbox_inches='tight')\n",
    "    plt.close()\n",
    "    \n",
    "    print(\"Visualization complete.\")\n",
    "\n",
    "#  MAIN ANALYSIS ====================\n",
    "\n",
    "def main():\n",
    "    \"\"\"Execute complete comparative analysis\"\"\"\n",
    "    print(\"=\" * 70)\n",
    "    print(\"WEIBULL vs MARKOV RELIABILITY ANALYSIS FOR LIFT STATIONS\")\n",
    "    print(\"=\" * 70)\n",
    "    \n",
    "    # Create output directory\n",
    "    output_dir = \"analysis_results\"\n",
    "    os.makedirs(output_dir, exist_join=True)\n",
    "    \n",
    "    # Load data (use synthetic data for reproduction)\n",
    "    try:\n",
    "        # Try synthetic data first\n",
    "        data_file = \"synthetic_lift_station_data.csv\"\n",
    "        assets_df = load_and_preprocess_data(data_file)\n",
    "    except FileNotFoundError:\n",
    "        print(\"Synthetic data file not found. Please run generate_synthetic_data.py first.\")\n",
    "        return\n",
    "    \n",
    "    # 1. Weibull Model Calibration\n",
    "    assets_df, beta_global = calibrate_weibull_parameters(assets_df, config.TARGET_CV)\n",
    "    \n",
    "    # Calculate baseline reliability\n",
    "    pof_weibull_baseline, risk_weibull_baseline, _ = calculate_weibull_reliability(assets_df, 0)\n",
    "    assets_df['PoF_Weibull'] = pof_weibull_baseline\n",
    "    assets_df['Risk_Weibull'] = risk_weibull_baseline\n",
    "    \n",
    "    # 2. Markov Model Simulation\n",
    "    print(\"\\nRunning Markov model simulation...\")\n",
    "    pof_markov_baseline, state_evolution = simulate_markov_trajectory(assets_df, 0)\n",
    "    assets_df['PoF_Markov'] = pof_markov_baseline[:, -1]\n",
    "    assets_df['Risk_Markov'] = assets_df['PoF_Markov'] * assets_df['Criticality']\n",
    "    \n",
    "    # 3. Budget Scenario Analysis\n",
    "    print(\"\\nRunning budget scenario analysis...\")\n",
    "    weibull_scenarios = run_budget_scenarios(assets_df, 'Weibull', config.BUDGET_SCENARIOS)\n",
    "    markov_scenarios = run_budget_scenarios(assets_df, 'Markov', config.BUDGET_SCENARIOS)\n",
    "    \n",
    "    # 4. Efficiency Analysis\n",
    "    print(\"\\nCalculating system efficiency...\")\n",
    "    assets_df['Availability_Weibull'] = 1 - (assets_df['PoF_Weibull'] * assets_df['MTTR_h'] / config.HOURS_PER_YEAR)\n",
    "    assets_df['Availability_Markov'] = 1 - (assets_df['PoF_Markov'] * assets_df['MTTR_h'] / config.HOURS_PER_YEAR)\n",
    "    \n",
    "    efficiency_weibull = calculate_station_efficiency(assets_df, 'Availability_Weibull')\n",
    "    efficiency_markov = calculate_station_efficiency(assets_df, 'Availability_Markov')\n",
    "    \n",
    "    # 5. Sensitivity Analysis\n",
    "    print(\"\\nRunning sensitivity analysis...\")\n",
    "    sensitivity_results = run_sensitivity_analysis(\n",
    "        assets_df, \n",
    "        mttr_values=[8, 12, 24], \n",
    "        speed_factors=[0.8, 1.0, 1.2]\n",
    "    )\n",
    "    \n",
    "    # 6. Create Visualizations\n",
    "    create_summary_plots(assets_df, weibull_scenarios, markov_scenarios, output_dir)\n",
    "    \n",
    "    # 7. Export Results\n",
    "    print(\"\\nExporting results...\")\n",
    "    \n",
    "    # Main results\n",
    "    assets_df.to_csv(f'{output_dir}/asset_analysis_results.csv', index=False)\n",
    "    \n",
    "    # Scenario results\n",
    "    for budget, results in weibull_scenarios.items():\n",
    "        results.to_csv(f'{output_dir}/weibull_scenario_{budget/1e6:.1f}M.csv', index=False)\n",
    "    \n",
    "    for budget, results in markov_scenarios.items():\n",
    "        results.to_csv(f'{output_dir}/markov_scenario_{budget/1e6:.1f}M.csv', index=False)\n",
    "    \n",
    "    # Efficiency results\n",
    "    efficiency_weibull.to_csv(f'{output_dir}/efficiency_weibull.csv', index=False)\n",
    "    efficiency_markov.to_csv(f'{output_dir}/efficiency_markov.csv', index=False)\n",
    "    \n",
    "    # Sensitivity results\n",
    "    sensitivity_results.to_csv(f'{output_dir}/sensitivity_analysis.csv', index=False)\n",
    "    \n",
    "    # Analysis summary\n",
    "    summary = {\n",
    "        'Total_Assets': len(assets_df),\n",
    "        'Total_Stations': assets_df['RR_Facility'].nunique(),\n",
    "        'Weibull_Beta': beta_global,\n",
    "        'Weibull_CV': config.TARGET_CV,\n",
    "        'Analysis_Years': config.YEARS,\n",
    "        'Baseline_Risk_Weibull': assets_df['Risk_Weibull'].sum(),\n",
    "        'Baseline_Risk_Markov': assets_df['Risk_Markov'].sum()\n",
    "    }\n",
    "    \n",
    "    pd.Series(summary).to_csv(f'{output_dir}/analysis_summary.csv')\n",
    "    \n",
    "    print(\"\\n\" + \"=\" * 70)\n",
    "    print(\"ANALYSIS COMPLETE\")\n",
    "    print(\"=\" * 70)\n",
    "    print(f\"Results saved to: {output_dir}/\")\n",
    "    print(f\"- Asset-level results: asset_analysis_results.csv\")\n",
    "    print(f\"- Budget scenarios: weibull_scenario_*.csv, markov_scenario_*.csv\")\n",
    "    print(f\"- Efficiency analysis: efficiency_*.csv\")\n",
    "    print(f\"- Sensitivity analysis: sensitivity_analysis.csv\")\n",
    "    print(f\"- Visualizations: *.png\")\n",
    "    print(f\"- Summary statistics: analysis_summary.csv\")\n",
    "    \n",
    "    # Display key findings\n",
    "    print(\"\\nKEY FINDINGS:\")\n",
    "    print(f\"- Total assets analyzed: {len(assets_df)}\")\n",
    "    print(f\"- Lift stations: {assets_df['RR_Facility'].nunique()}\")\n",
    "    print(f\"- Weibull shape parameter (β): {beta_global:.3f}\")\n",
    "    print(f\"- Baseline system risk (Weibull): {assets_df['Risk_Weibull'].sum():.2f}\")\n",
    "    print(f\"- Baseline system risk (Markov): {assets_df['Risk_Markov'].sum():.2f}\")\n",
    "    print(f\"- Mean station efficiency (Weibull): {efficiency_weibull['Efficiency'].mean():.3f}\")\n",
    "    print(f\"- Mean station efficiency (Markov): {efficiency_markov['Efficiency'].mean():.3f}\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
